Setup

knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)

# Load libraries
library(tidyverse)
library(cowplot)
library(ggplot2)
library(ggpattern)
library(ggsci)
library(utils)
library(reshape2)
library(stringr)
library(gridGraphics)
library(zellkonverter)
library(SingleCellExperiment)
library(cytomapper)
library(scater)
library(ggpubr)
# Set general input paths to all analysis files
analysis.path <- "/mnt/bb_dqbm_volume/analysis"

# Set path to masks
masks_tonsil.path <- "/mnt/bb_dqbm_volume/data/Tonsil_th152/steinbock/masks_deepcell"
#masks_lung.path <- "/mnt/bb_dqbm_volume/data/Tonsil_th152/steinbock/masks_deepcell"
# Import helper fncs
source("../analysis/helpers/helper_fnc.R")

Different Datasets

Next, we look at U using different training datasets. For this, we read in the results from CISI training on the Tonsil th152 and the Immucan lung dataset (tonsil_vs_lung.py).

## Read results
# Read in all results for tonsil and lung comparison into one dataframe
tonsil_vs_lung.files <- list.files(analysis.path, 'simulation_results.txt',
                                         full.names=TRUE, recursive=TRUE)
# TODO: change this line if more files come along and need to exclude
tonsil_vs_lung.files <- tonsil_vs_lung.files[grepl(
  "tonsil_vs_lung|Tonsil_th152/training/full|Immucan_lung/training/subset", tonsil_vs_lung.files)]
tonsil_vs_lung.res <- lapply(tonsil_vs_lung.files, read_results, type="u") %>% 
  bind_rows() 

# Get information about best U files
tonsil_vs_lung.u_files <- tonsil_vs_lung.res %>% 
  dplyr::filter(dataset==training) %>% 
  dplyr::select(k, training, `Best crossvalidation fold`, datasize) %>% 
  distinct()

# Harmonize all dataset names
patterns <- unique(unlist(lapply(tonsil_vs_lung.res$training, function(name){
  if(length(str_split(name, "_")[[1]])==1){
    name
  }
})))

tonsil_vs_lung.res$training <- unlist(lapply(tonsil_vs_lung.res$training, function(pat){
  patterns[str_detect(pat, fixed(patterns, ignore_case=TRUE))]
}))
tonsil_vs_lung.res$dataset <- unlist(lapply(tonsil_vs_lung.res$dataset, function(pat){
  patterns[str_detect(pat, fixed(patterns, ignore_case=TRUE))]
}))

## Read U
tonsil_vs_lung.u <- lapply(1:(nrow(tonsil_vs_lung.u_files)), function(i){
  file <- file.path(analysis.path, tonsil_vs_lung.u_files[i, "training"], "training",
                    tonsil_vs_lung.u_files[i, "datasize"], 
                    paste0("k_", tonsil_vs_lung.u_files[i, "k"]),
                    paste0("crossvalidation_", tonsil_vs_lung.u_files[i, "Best crossvalidation fold"]),
                    "gene_modules.csv")
  u_temp <- read_U(file, "training")
}) %>% bind_rows() %>%
  dplyr::rename(dataset=rep)

## Read X_test and X_simulated
# Specify X_test and X_simulated files to be read in
tonsil_vs_lung_X.files <- list.files(analysis.path, "X_", full.names=TRUE, recursive=TRUE)
# TODO: change this line if more files come along and need to exclude
tonsil_vs_lung_X.files <- tonsil_vs_lung_X.files[grepl(
  "tonsil_vs_lung|Tonsil_th152/training/full|Immucan_lung/training/subset", tonsil_vs_lung_X.files)]

# Read in all sce experiments from saved anndata and save additional info in metadata
# (e.g. which dataset is used in training and testing, which k was used and if it
# is the ground truth or simulated X)
# Remove neg. values and transform counts?
tonsil_vs_lung_X.list <- lapply(tonsil_vs_lung_X.files, read_results, type="x")
tonsil_vs_lung_X.list <- lapply(tonsil_vs_lung_X.list, function(sce.temp){
  pat <- metadata(sce.temp)
  metadata(sce.temp)$dataset <- patterns[str_detect(pat$dataset, 
                                                    fixed(patterns, ignore_case=TRUE))]
  metadata(sce.temp)$training <- patterns[str_detect(pat$training, 
                                                     fixed(patterns, ignore_case=TRUE))]
  # TODO: Remove negative values? Add transformed counts?
  # counts(sce.temp) <- pmax(counts(sce.temp), 0)
  assay.name <- names(assays(sce.temp))
  for (a in assay.name[-1]) {
    assay(sce.temp, a) <- NULL
  }
  assay(sce.temp, "exprs") <- asinh(counts(sce.temp)/1)
  
  sce.temp
})

## Read masks
# Read in masks for tonsil data
masks.tonsil <- loadImages(masks_tonsil.path, as.is = TRUE)
mcols(masks.tonsil) <- DataFrame(sample_id=names(masks.tonsil))

Results

# For each different measurement of training results, plot a barplot comparing
# diff. k-sparsities, simulation types and training-test set combinations
for (measure in names(tonsil_vs_lung.res)[2:10]) {
  cat('##### ', measure, '\n')
  
  # Reorder dataframe for plotting
  data_temp <- tonsil_vs_lung.res %>%
    dplyr::select(dataset, training, simulation, k, !!measure) %>%
    mutate(group=paste(training, dataset, sep="_"))
  
  # Create barplot
  tonsil_vs_lung.barplot <- ggplot(data_temp, aes(x=group, y=!!sym(measure), fill=k, pattern=simulation)) +
    geom_bar_pattern(stat="identity",
                   position=position_dodge(preserve="single"),
                   width=0.6,
                   color="black", 
                   pattern_fill="black",
                   pattern_angle=45,
                   pattern_density=0.3,
                   pattern_spacing=0.01,
                   pattern_key_scale_factor=0.6) +
    scale_y_continuous(limits=c(0, 1)) +
    scale_fill_npg() +
    scale_pattern_manual(values=c(composite="stripe", noisy="none"), 
                         labels=c("Composite", "Noisy")) +
    labs(x="Training_Test Dataset", y=str_to_title(measure), pattern="Simulation Type") + 
    guides(pattern=guide_legend(override.aes=list(fill="white")),
           fill=guide_legend(override.aes=list(pattern="none"))) +
    theme_cowplot()
  
  print(tonsil_vs_lung.barplot)
  cat("\n\n")
}
Overall pearson

Overall spearman

Gene average

Sample average

Sample dist pearson

Sample dist spearman

Gene dist pearson

Gene dist spearman

Matrix coherence (90th ptile)

Heatmap of U for Different Datasets

tonsil_vs_lung.cor <- plot_U(tonsil_vs_lung.u, "k", "dataset")
k = 1

k = 3

Module Comparison for Different Datasets

plot_U_membership(tonsil_vs_lung.u, "k", "dataset")
k = 1

k = 3

Mantel Test between U for Different Datasets

tonsil_vs_lung.mantel <- lapply(tonsil_vs_lung.cor, function(l){
  mantel_test(l[[1]], l[[2]])$mantel_r
  }) %>%
  as.data.frame(col.names=names(tonsil_vs_lung.cor), check.names=FALSE)

knitr::kable(tonsil_vs_lung.mantel, digits=2,
             col.names=paste0("k = ", names(tonsil_vs_lung.mantel)))
k = 1 k = 3
0.67 0.42

CD20 of Test Image (Tonsil th152)

tonsil_vs_lung_X.plotList <- keep(tonsil_vs_lung_X.list, function(x){
  metadata(x)$training=="tonsil" & metadata(x)$dataset=="tonsil" & metadata(x)$k=="1"})
names(tonsil_vs_lung_X.plotList) <- lapply(tonsil_vs_lung_X.plotList, 
                                           function(x){metadata(x)$ground_truth})
tonsil_vs_lung_X.imgList <- plot_cells(tonsil_vs_lung_X.plotList, masks.tonsil, 
                                       "CD20")
tonsil_vs_lung_X.img <- plot_grid(plotlist=append(tonsil_vs_lung_X.imgList[grepl("20220520_TsH_th152_cisi1_002",
                                                                                       names(tonsil_vs_lung_X.imgList))],
                                                        tonsil_vs_lung_X.imgList[grepl("legend",
                                                                                       names(tonsil_vs_lung_X.imgList))]),
                                          ncol=2, labels=names(tonsil_vs_lung_X.plotList),
                                          label_size=15, hjust=c(-2, -1.5), 
                                          vjust=1, scale=0.9)
tonsil_vs_lung_X.img

CD20 of Test Image Overlayed (Tonsil th152)

tonsil_vs_lung_X.plotListRenamed <- lapply(names(tonsil_vs_lung_X.plotList), function(n){
  rownames(tonsil_vs_lung_X.plotList[[n]]) <- paste(rownames(tonsil_vs_lung_X.plotList[[n]]),
                                                    n, sep="\n")
  tonsil_vs_lung_X.plotList[[n]]
})
names(tonsil_vs_lung_X.plotListRenamed) <- names(tonsil_vs_lung_X.plotList)
tonsil_vs_lung_X.overlayed <- do.call("rbind", tonsil_vs_lung_X.plotListRenamed)

pois <- c("CD20\nsimulated", "CD20\nground_truth")

tonsil_vs_lung_X.imgDiff <- plotCells(mask=masks.tonsil[unique(colData(tonsil_vs_lung_X.overlayed)$sample_id)], 
                                      object=tonsil_vs_lung_X.overlayed,
                                      cell_id="ObjectNumber", img_id="sample_id", colour_by=pois,
                                      return_plot=TRUE,  image_title=list(cex=1),
                                      scale_bar=list(cex=1, lwidth=5),
                                      legend=list(colour_by.title.cex=0.7, colour_by.labels.cex=0.7))

ggdraw(tonsil_vs_lung_X.imgDiff$plot)

Scatterplots of asinh Transformed Counts

Plot of asinh transformed counts coloured by defined celltype.

tonsil_vs_lung_X.exprsList <- keep(tonsil_vs_lung_X.list, function(x){
  metadata(x)$training=="lung" & metadata(x)$dataset=="lung" & metadata(x)$k=="1"})
names(tonsil_vs_lung_X.exprsList) <- lapply(tonsil_vs_lung_X.exprsList,
                                            function(x){metadata(x)$ground_truth})

tonsil_vs_lung_X.Exprs <- plot_grid(plot_exprs(tonsil_vs_lung_X.exprsList, "B", "CD3", "CD20"),
                                    plot_exprs(tonsil_vs_lung_X.exprsList, "BnT", "CD3", "CD20"),
                                    plot_exprs(tonsil_vs_lung_X.exprsList, "Neutro", "MPO", "CD15"),
                                    ncol=1)
print(tonsil_vs_lung_X.Exprs)

Results per Protein for Different Datasets

For k=1.

tonsil_vs_lung_X.df <- lapply(tonsil_vs_lung_X.list, function(sce){
  counts.long <- as.data.frame(counts(sce)) %>%
    mutate(protein=rownames(.)) %>%
    melt() %>%
    dplyr::rename(!!as.symbol(metadata(sce)$ground_truth):=value,
                  cell=variable) %>%
    mutate(k=metadata(sce)$k,
           dataset=paste(metadata(sce)$training, metadata(sce)$dataset, sep="_"))
  
}) %>% bind_rows() %>%
  group_by(protein, cell, k, dataset) %>%
  summarise_all(na.omit)

for (i in unique(tonsil_vs_lung_X.df$dataset)) {
  cat('#####', i, '\n')

  tonsil_vs_lung.proteinPlot <- ggscatter(tonsil_vs_lung_X.df %>%
                                         dplyr::filter(k=="1" & dataset==i), 
                                         x="ground_truth", y="simulated",
                                         add="reg.line",
                                         color=pal_npg("nrc")("1"),
                                         add.params=list(color=pal_npg("nrc")("4")[4],
                                                         size=2)) +
    facet_wrap(~ protein, scales="free", ncol=5) +
    theme_cowplot(10) +
    geom_abline(slope=1, linetype="dashed") +
    stat_cor(size=2)
  
  print(tonsil_vs_lung.proteinPlot)
  cat('\n\n')
}
lung_lung

tonsil_lung

lung_tonsil

tonsil_tonsil